package com.mark.search.spider;

import com.alibaba.fastjson.JSON;
import com.mark.search.dto.EsSearch;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Html;

/**
 * <p>
 * 抓取数据模块
 * </p>
 *
 * @author : mark
 * @since : 2022/7/20 10:34
 */
@Service
@Slf4j
public class SpiderService implements PageProcessor {

    EsSearch esSearch = new EsSearch();

    public String id;

    @Override
    public void process(Page page) {

        Html html = page.getHtml();
        esSearch.setId(id);
        String title = html.xpath("//*[@id=\"Main\"]/div[2]/div[1]/h1").toString();
        esSearch.setName(title);

        String content = html.xpath("//*[@id=\"Main\"]/div[2]/div[3]/div/div").toString();
        esSearch.setContent(content);
        log.info("抓取到的数据：{}", JSON.toJSON(content));
    }

    @Override
    public Site getSite() {
        return null;
    }


}
